import os
import pandas as pd
from glob import glob
import cPickle

listpkl = cPickle.load(open('train_image_label.pkl','ru'))

paths = []
labels = []
num = len(listpkl)

for i,item in enumerate(listpkl):
    print i,'/',num
    paths.append(item[0].replace('ms1m_aligned/',''))
    labels.append(item[1])
listpd = pd.DataFrame({'paths':paths,
                       'labels':labels})
listpd.to_csv('train_list_ori.csv',header=None,index=False)
print listpd.head()
print listpd.shape
# listpd = pd.read_csv('MS-Celeb-1M_clean_list.txt',header=None,names=['file_path','label'],sep=' ')
#
#
image_folder = '/media/hszc/data1/face_data/ms1m/ms1m_aligned'

valid_paths = []


for subfolder in os.listdir(image_folder):
    subfolder_path = os.path.join(image_folder,subfolder)
    for image_path in glob(os.path.join(subfolder_path,'*.jpg')):
        image_name = image_path.split('/')[-1]

        file_path = os.path.join(subfolder,image_name )
        valid_paths.append(file_path)

temp = pd.DataFrame({'paths':valid_paths})
temp['isvalid'] = 1

listpd = listpd.merge(temp,on='paths',how='left').fillna(0)
valid_list = listpd.loc[listpd.isvalid==1, ['paths','labels']]

print valid_list.shape
valid_list.to_csv('train_list.csv',header=None,index=False)

# print listpd